In [1]:

    
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline



In [2]:

    
app = pd.read_pickle('/Users/krystal/Desktop/app_cleaned.pickle')
app.head()









    Out[2]:






  
    
      
      category
      current_rating
      description
      id
      is_InAppPurcased
      is_multilingual
      is_multiplatform
      name
      new_version_desc
      num_current_rating
      ...
      review2
      review2_star
      review3
      review3_star
      scrape_date
      seller
      size
      update_date
      url
      version
    
  
  
    
      0
      Books
      NaN
      ~ ~ >   A   m a g i c a l   f a n t a s y   - ...
      616686830
      1
      0
      0
      The Little Mermaid - A Free Interactive Childr...
      >   H e y   T a b T a l e r s ,   g r e a t   ...
      NaN
      ...
      None
      NaN
      None
      NaN
      2017-03-11
      TabTale LTD
      75.0 MB
      Oct 15, 2015
      https://itunes.apple.com/us/app/little-mermaid...
      1.4
    
    
      1
      Books
      4.500
      
      445211116
      1
      1
      0
      Obeikan Store
      
      16.0
      ...
      It's greet app .
      5.0
      None
      NaN
      2017-03-11
      new Thinkers
      32.6 MB
      Jan 06, 2014
      https://itunes.apple.com/us/app/obeikan-store/...
      5.0
    
    
      2
      Books
      NaN
      W e i r d   &   F u n n y   T o n g u e   T w ...
      427342569
      1
      0
      0
      A-Z Tongue Twisters! The Best Fun & Funny Atte...
      U s e r   I n t e r f a c e   i m p r o v e m ...
      NaN
      ...
      I love it
      5.0
      None
      NaN
      2017-03-11
      Michael Quach
      9.7 MB
      Sep 04, 2014
      https://itunes.apple.com/us/app/z-tongue-twist...
      2.2.5
    
    
      3
      Books
      NaN
      
      462186890
      1
      0
      0
      网易云阅读-电子书城免费小说新闻一站阅读
      2 0 1 7
      NaN
      ...
      
      5.0
      
      5.0
      2017-03-11
      NetEase (Hangzhou) Network Co., Ltd.
      88.3 MB
      Jan 20, 2017
      https://itunes.apple.com/us/app/%E7%BD%91%E6%9...
      5.2.4
    
    
      4
      Books
      4.875
      i P h o n e  i P a d...
      952059546
      1
      0
      0
      微信读书
      1 .
      8.0
      ...
      qq
      4.0
      1
      5.0
      2017-03-11
      Tencent Technology (Shenzhen) Company Limited
      75.4 MB
      Feb 13, 2017
      https://itunes.apple.com/us/app/%E5%BE%AE%E4%B...
      1.5.2
    
  

5 rows × 26 columns



In [ ]:

    
app = app.drop_duplicates()



In [ ]:

    
for i in range(0,len(app)):
    unit = app['size'][i][-2:]
    if unit == 'GB':
        app['size'][i] = float(app['size'][i][:-3])*1000
    else:
        app['size'][i] = float(app['size'][i][:-3])

Convert unit of app size from GB into KB.



In [22]:

    
rating_df = app[["name","size","overall_rating", "current_rating", 'num_current_rating', "num_overall_rating"]].dropna()



In [23]:

    
rating_cleaned = {'1 star':1, "1 and a half stars": 1.5, '2 stars': 2, '2 and a half stars':2.5, "3 stars":3, "3 and a half stars":3.5, "4 stars": 4,
                 '4 and a half stars': 4.5, "5 stars": 5}



In [24]:

    
rating_df.overall_rating = rating_df.overall_rating.replace(rating_cleaned)



In [25]:

    
rating_df['weighted_rating'] = np.divide(rating_df['num_current_rating'],rating_df['num_overall_rating'])*rating_df['current_rating']+(1-np.divide(rating_df['num_current_rating'],rating_df['num_overall_rating']))*rating_df['overall_rating']

Add variable weighted rating as app's quality into data set.



In [27]:

    
plt.scatter(rating_df['size'], rating_df['weighted_rating'])
plt.xlabel('Size of app')
plt.ylabel('Quality of app')
plt.title('Relationship between app size and quality')
plt.show()



In [28]:

    
rating_df_2 = rating_df[rating_df['size'] <= 500]



In [29]:

    
plt.scatter(rating_df_2['size'], rating_df_2['weighted_rating'])
plt.xlabel('Size of app')
plt.ylabel('Quality of app')
plt.title('Relationship between app size(less than 500) and quality')
plt.show()

I plot scatter plot for app size and overall rating of app. The second plot only contains app with size less than 500KB. I find that there is a positive association between app size and app overall rating. Further analysis is still needed.



In [ ]:

	category	current_rating	description	id	is_InAppPurcased	is_multilingual	name	new_version_desc	num_current_rating	...	review2	review2_star	review3	review3_star	scrape_date	seller	size	update_date	url	version
0	Books	NaN	~ ~ > A m a g i c a l f a n t a s y - ...	616686830	1	0	The Little Mermaid - A Free Interactive Childr...	> H e y T a b T a l e r s , g r e a t ...	NaN	...	None	NaN	None	NaN	2017-03-11	TabTale LTD	75.0 MB	Oct 15, 2015	https://itunes.apple.com/us/app/little-mermaid...	1.4
1	Books	4.500		445211116	1	1	Obeikan Store		16.0	...	It's greet app .	5.0	None	NaN	2017-03-11	new Thinkers	32.6 MB	Jan 06, 2014	https://itunes.apple.com/us/app/obeikan-store/...	5.0
2	Books	NaN	W e i r d & F u n n y T o n g u e T w ...	427342569	1	0	A-Z Tongue Twisters! The Best Fun & Funny Atte...	U s e r I n t e r f a c e i m p r o v e m ...	NaN	...	I love it	5.0	None	NaN	2017-03-11	Michael Quach	9.7 MB	Sep 04, 2014	https://itunes.apple.com/us/app/z-tongue-twist...	2.2.5
3	Books	NaN		462186890	1	0	网易云阅读-电子书城免费小说新闻一站阅读	2 0 1 7	NaN	...		5.0		5.0	2017-03-11	NetEase (Hangzhou) Network Co., Ltd.	88.3 MB	Jan 20, 2017	https://itunes.apple.com/us/app/%E7%BD%91%E6%9...	5.2.4
4	Books	4.875	i P h o n e i P a d...	952059546	1	0	微信读书	1 .	8.0	...	qq	4.0	1	5.0	2017-03-11	Tencent Technology (Shenzhen) Company Limited	75.4 MB	Feb 13, 2017	https://itunes.apple.com/us/app/%E5%BE%AE%E4%B...	1.5.2